package com.kdpujie.alink.source;

import com.alibaba.alink.operator.batch.source.BaseSourceBatchOp;
import com.alibaba.alink.operator.batch.source.CsvSourceBatchOp;

public class AvazuCtrSource<T extends BaseSourceBatchOp<T>> implements Source<T>{
    

    @Override
    public String GetLabelName() {
        return "click";
    }

    @Override
    public String[] GetCategoryColsName() {
        String[] categoryColsNames = new String[] {
            "C1", "banner_pos", "site_category", "app_domain", "app_category", "device_type", "device_conn_type", "site_id", "site_domain", "device_id", "device_model"
        };
        return categoryColsNames;
    }

    @Override
    public String[] GetColsName() {
        String[] colsName = new String[] {
            "C1", "banner_pos", "site_category", "app_domain", "app_category", "device_type", "device_conn_type", "C14", "C15", "C16", "C17", "C18", "C19", "C20", "C21", "site_id", "site_domain", "device_id", "device_model"
        };
        return colsName;
    }

    @Override
    public String[] GetNumericalColsNames() {
        String[] numericalColsNames = new String[] {
            "C14", "C15", "C16", "C17", "C18", "C19", "C20", "C21"
        };
        return numericalColsNames;
    }

    @Override
    public T GetSource() {
        String schemaStr = "id string, click string, dt string, C1 string, banner_pos int, site_id string, site_domain string, site_category string, app_id string, app_domain string, app_category string, device_id string, "
	    + "device_ip string, device_model string, device_type string, device_conn_type string, C14 int, C15 int, C16 int, C17 int, C18 int, C19 int, C20 int, C21 int";
        // 原始训练数据读入，大小约1.2G  /Users/pujie/codes/data/avazu-ctr-train-8M.csv
        // CsvSourceBatchOp originBatchData = new CsvSourceBatchOp()
        // .setFilePath("https://ubix-bigdata-test.oss-cn-beijing-internal.aliyuncs.com/ml_test/avazu-ctr-train-8M.csv") 
        // .setSchemaStr(schemaStr)
        // .setIgnoreFirstLine(true);
        // 原始测试数据读入，大小约68M: /Users/pujie/codes/data/avazu-small.csv https://ubix-bigdata-test.oss-cn-beijing-internal.aliyuncs.com/ml_test/avazu-small.csv
        CsvSourceBatchOp data = new CsvSourceBatchOp()
        .setFilePath("/Users/pujie/codes/data/avazu-small.csv")
        .setSchemaStr(schemaStr);
        return (T)data;
    }
    
}
